
global SOEP "../SOEP/"
cd "${SOEP}OUTPUT/"

di "************GENERAL RELEASE****************"
di "************LONG FORM PANEL ****************"
di "************2011-2018***********************"

use "${SOEP}RAW/soep-is.2019_stata_en/inno.dta",clear
cd "${SOEP}/OUTDATA/"
	tab syear
	tab sc101 syear

	foreach i in 102 103 104 105 107 108 110 111 201 {
		gen SC`i'=(6-sc`i') if sc`i'>0
	}
	foreach i in 101 106 109 202 {
		gen SC`i'=sc`i' if sc`i'>0
	}

	mdesc SC* if syear==2017
	egen nmis2019=rmiss2(SC*)
	tab nmis2019


	 keep if SC101!=.
	capture drop SC SC2
	egen SC=rowtotal(SC*),missing
	replace SC=. if SC==0
	/*if one is not answered, turn into .*/
	foreach i in 102 103	 104 105 107 108 110 111 201 101 106 109 202 {
	replace SC=. if sc`i'<0 
	replace SC=. if sc`i'==.
	}
	egen SdSC=std(SC)

	 xtset pid syear
	gen year=syear
keep if SC!=.
keep if year==2017
save inno2017_V2.dta

  
di "***********************************"
di "********Merge with controls var******"
di "*********Available 1998-2019*************"
di "*********Using 2017 first*************"


use "${SOEP}RAW/soep-is.2019_stata_en/pgen.dta",clear
	gen year=syear
	clonevar emp=pgemplst
	clonevar ms=pgfamstd
	clonevar edu=pgcasmin if pgcasmin!=-1
	gen edu_2019=pgcasmin if syear==2019
	bys pid (edu_2019): replace edu_2019=edu_2019[1] /*expand to the rest of the pid observations with 2019 values */
	
	clonevar yrs_edu=pgbilzt if inrange(pgbilzt,0,18)

	gen yrs_edu_2019=yrs_edu if syear==2019

	bys pid (yrs_edu_2019): replace yrs_edu_2019=yrs_edu_2019[1] /*expand to the rest of the pid observations with 2019 values */
	
	gen studying=(pglfs==3) if pglfs>0 & pglfs!=.

	sort pid syear yrs_edu
	list pid syear yrs_edu yrs_edu_2019 in 2000/2200,sepby(pid) /*double check the expansion*/


	tab emp
	tab ms
	tab edu
	

di "/*Hourly wage*/"	
	*Working hours
	gen whrs=pgtatzt
	replace whrs=pgvebzt if mi(pgtatzt)
 
	*Hourly wage (gross)
	gen wage=pglabgro/(4.3*whrs)
	replace wage=. if wage<5
di " /*END code for hourly wage*/	""
	gen wage_2019=wage if year==2019
	bys pid (wage_2019): replace wage_2019=wage_2019[1] /*expand to the rest of the pid observations with 2019 values */
	bys year: sum wage pglabgro pgtatzt pgvebzt
	
keep if syear==2017 /*N=6000+*/


	label data "adding hourly wage in 2019"
save "${SOEP}OUTDATA/pgen_2017_V2.dta",replace


use "${SOEP}OUTDATA/pgen_2017_V2.dta",clear

merge 1:1 pid using "${SOEP}OUTDATA/inno2017_V2",generate(_merge_pgen) /*64 in self control not matched*/
keep if _merge_pgen==3
save "${SOEP}OUTDATA/inno2017_pgen",replace


use "${SOEP}RAW/soep-is.2019_stata_en/p.dta"
keep plh0182 ple00* pid syear

	di "*********Life Satisfaction**************"
	clonevar ls=plh0182 if inrange(plh0182,0,10)

	di "*********MCS PCS**************"
	di "Code for creating MCS/PCS---"
	 
	rename ple0008 status
	rename ple0004 stairs
	rename ple0027 var1
	rename ple0028 var2
	rename ple0029 var3
	rename ple0030 var4
	rename ple0031 var5
	rename ple0032 var6
	rename ple0033 var7
	rename ple0034 var8
	rename ple0035 var9
	 
	foreach x of varlist status stairs var1-var9 {
	replace `x'=. if `x'<0
	}
	 

	gen pf100 = ((((stairs)/1)-1)/2)*100
	gen rp100 = ((((var5+var6)/2)-1)/4)*100
	gen re100 = ((((var7+var8)/2)-1)/4)*100
	gen mh100 = ((((var1+(6 - var2))/2)-1)/4)*100
	gen bp100 = (((var4)-1)/4)*100
	gen gh100 = (((6 - status)-1)/4)*100
	gen vt100 = (((6 - var3)-1)/4)*100
	gen sf100 = (((var9)-1)/4)*100
	 
	gen pf_z = (pf100-73.14)/32.15
	gen rp_z = (rp100-74.29)/26.46
	gen bp_z = (bp100-73.27)/27.14
	gen gh_z = (gh100-60.57)/24.08
	gen vt_z = (vt100-52.94)/22.84
	gen sf_z = (sf100-83.22)/23.57
	gen re_z = (re100-81.94)/22.35
	gen mh_z = (mh100-61.96)/20.47
	 
	gen pf_nbs = (pf_z*10)+50
	gen rp_nbs = (rp_z*10)+50
	gen bp_nbs = (bp_z*10)+50
	gen gh_nbs = (gh_z*10)+50
	gen vt_nbs = (vt_z*10)+50
	gen sf_nbs = (sf_z*10)+50
	gen re_nbs = (re_z*10)+50
	gen mh_nbs = (mh_z*10)+50
	 
	gen pcs = (((0.414*pf_z) + (0.279*rp_z) + (0.331*bp_z) + (0.330*gh_z) + (-0.041*vt_z) + (-0.068*sf_z) + (-0.110*re_z) + (-0.244*mh_z))*10)+50
	gen mcs = (((-0.209*pf_z) + (-0.021*rp_z) + (-0.105*bp_z) + (-0.103*gh_z) + (0.258*vt_z) + (0.333*sf_z) + (0.378*re_z) + (0.489*mh_z))*10)+50

	di "*******************"
	di "END MCS PCS code"
	di "*******************"
keep if syear==2017

save "${SOEP}OUTDATA/p_selected2017_cleaned.dta",replace

use "${SOEP}OUTDATA/p_selected2017_cleaned.dta",clear
merge 1:1 pid using "${SOEP}OUTDATA/inno2017_pgen",gen(_mergeP)
/*all SC has been matched; drop extra obs in P.dta*/
keep if _mergeP==3

save "${SOEP}OUTDATA/inno2017_pgen_P",replace

use "${SOEP}OUTDATA/inno2017_pgen_P",clear
merge m:1 hid syear using "${SOEP}RAW/soep-is.2019_stata_en/h.dta"
keep if _merge==3 /*all master matched*/

gen lginc=ln(hlc0005) if hlc0005>0 /*103 missing values*/

save "${SOEP}OUTDATA/inno2017_pgen_P_H",replace
di "Merged 2017 file : inno2017_pgen_P_H"
di "**********END**********************************"
di "***********N=2750*******************************"
di "************************************************"







di "************EXCLUSIVE RELEASE****************"
di "************2020 only **********************"
di "************N=3,785 ***********************"
  
use "${SOEP}RAW/SOEP_IS_2020_exclusive/Inno20_ISK.dta", clear

tab syear
tab isc101 syear

foreach i in 102 103 104 105 107 108 110 111 201 {
	gen SC`i'=(6-isc`i') if isc`i'>0
}
foreach i in 101 106 109 202 {
	gen SC`i'=isc`i' if isc`i'>0
}
di "count missing N in SC"
*ssc install mdesc
*search rmiss2

mdesc SC*
egen nmis2020=rmiss2(SC*)
tab nmis2020

keep if SC101!=.
capture drop SC SC2
egen SC=rowtotal(SC*),missing
replace SC=. if SC==0
/*if one is not answered, turn into .*/
foreach i in 102 103 104 105 107 108 110 111 201 101 106 109 202 {
replace SC=. if isc`i'<0 
replace SC=. if isc`i'==.
}
egen SdSC=std(SC)

xtset pid syear
 
keep if SC!=.
gen year=syear
 
save "${SOEP}OUTDATA/inno2020_V2.dta"

use "${SOEP}OUTDATA/inno2020_V2",clear
 merge 1:1 pid using "${SOEP}RAW/SOEP_IS_2020_exclusive/Inno20_PB.dta",gen(_mergePB2020)

 keep if _mergePB2020==3 /*all master is matched*/
 
 clonevar ls=pzule1 if inrange(pzule1,0,10)
 tab ls

	di "*********MCS PCS**************"
	di "Code for creating MCS/PCS---"
	 
	rename pges01 status
	rename pges02 stairs
	*rename pges03 limits
	rename pges05 var1
	rename pges06 var2
	rename pges07 var3
	rename pges08 var4
	rename pges09 var5
	rename pges10 var6
	rename pges11 var7
	rename pges12 var8
	rename pges13 var9
	 
	foreach x of varlist status stairs var1-var9 {
	replace `x'=. if `x'<0
	}
	 

	gen pf100 = ((((stairs)/1)-1)/2)*100
	gen rp100 = ((((var5+var6)/2)-1)/4)*100
	gen re100 = ((((var7+var8)/2)-1)/4)*100
	gen mh100 = ((((var1+(6 - var2))/2)-1)/4)*100
	gen bp100 = (((var4)-1)/4)*100
	gen gh100 = (((6 - status)-1)/4)*100
	gen vt100 = (((6 - var3)-1)/4)*100
	gen sf100 = (((var9)-1)/4)*100
	 
	gen pf_z = (pf100-73.14)/32.15
	gen rp_z = (rp100-74.29)/26.46
	gen bp_z = (bp100-73.27)/27.14
	gen gh_z = (gh100-60.57)/24.08
	gen vt_z = (vt100-52.94)/22.84
	gen sf_z = (sf100-83.22)/23.57
	gen re_z = (re100-81.94)/22.35
	gen mh_z = (mh100-61.96)/20.47
	 
	gen pf_nbs = (pf_z*10)+50
	gen rp_nbs = (rp_z*10)+50
	gen bp_nbs = (bp_z*10)+50
	gen gh_nbs = (gh_z*10)+50
	gen vt_nbs = (vt_z*10)+50
	gen sf_nbs = (sf_z*10)+50
	gen re_nbs = (re_z*10)+50
	gen mh_nbs = (mh_z*10)+50
	 
	gen pcs = (((0.414*pf_z) + (0.279*rp_z) + (0.331*bp_z) + (0.330*gh_z) + (-0.041*vt_z) + (-0.068*sf_z) + (-0.110*re_z) + (-0.244*mh_z))*10)+50
	gen mcs = (((-0.209*pf_z) + (-0.021*rp_z) + (-0.105*bp_z) + (-0.103*gh_z) + (0.258*vt_z) + (0.333*sf_z) + (0.378*re_z) + (0.489*mh_z))*10)+50

	di "*******************"
	di "END MCS PCS code"
	di "*******************"
	
	
	
merge m:1 hid using "${SOEP}RAW/SOEP_IS_2020_exclusive/Inno20_HH.dta",gen(_mergeHH2020)
drop if _mergeHH2020==2


gen childbirth=0
replace childbirth=1 if inlist(2017,hkgeba,hkgebb,hkgebc,hkgebd,hkgebe,hkgebf)
replace childbirth=1 if inlist(2018,hkgeba,hkgebb,hkgebc,hkgebd,hkgebe,hkgebf)
replace childbirth=1 if inlist(2019,hkgeba,hkgebb,hkgebc,hkgebd,hkgebe,hkgebf)
replace childbirth=1 if inlist(2020,hkgeba,hkgebb,hkgebc,hkgebd,hkgebe,hkgebf)
save "${SOEP}OUTDATA/inno2020_PB_HH.dta",replace
use "${SOEP}OUTDATA/inno2020_PB_HH.dta",clear

di "Merged 2020 file : inno2020_PB_HH"
di "**********END**********************************"
di "************************************************"






di "***********************************"
di "************APPEND 2017 2020*******"
di "************LONG FORM *************"
di "************N=4187***********************"
use "${SOEP}OUTDATA/inno2020_PB_HH.dta",clear
append using "${SOEP}OUTDATA/inno2017_pgen_P_H"
  
  
de,s
tab year
drop if year==2018
bys pid (SC): gen num_appear=_N
di "***********************"
di "keep only appear twice"
keep if num_appear==2 
bys pid (year): gen SC21=SC[2]-SC[1]
bys year: sum SC, de

xtset pid year
xtile SC_quintile_2017 = SC if year==2017, nq(5)
xtile SC_quintile_2020 = SC if year==2020, nq(5)

gen SC_quintile=SC_quintile_2017
replace SC_quintile=SC_quintile_2020 if SC_quintile==.


/*using edu 2019 as proxy for 2020*/
bys pid (yrs_edu_2019): replace yrs_edu_2019=yrs_edu_2019[1] /*expand 2019 to 2020*/
bys pid: replace yrs_edu=yrs_edu_2019 if year==2020 /*replace yrs_edu in 2020 to 2019 value*/



*save "${SOEP}OUTDATA/inno2017_2020_V3",replace /* long form* adding H for income for 2019 from V2 */

	duplicates report pid
	bys pid (year): gen SC2017=SC[1]
	bys pid (year): gen SC2020=SC[2]


*use "../OUTDATA/Cleaned_wide_2017_2020_V2.dta",clear

	capture drop Q10*
	gen Q10SC2017=.
	gen Q10SC2020=.
	
	gen dif_SC=SC2020-SC2017


	foreach y in SC2017 SC2020 {
	_pctile `y'  ,nq(10)
	di "1"
	replace Q10`y'=1 if `y'<=r(r1)
	forvalues j=1/8 {
		local l=`j'+1
		di "`l'"
	replace Q10`y'=`l' if `y'<r(r`l') & `y'>=r(r`j')	
	}
	di "10"
	replace Q10`y'=10 if `y'>=r(r9)
	}


	tab Q10SC2017 Q10SC2020,cell

	gen dif_Q10=Q10SC2020-Q10SC2017


di "****Finer unit 5%********"	
		capture drop Q20*
	gen Q20SC2017=.
	gen Q20SC2020=.
	

	foreach y in SC2017 SC2020 {
	_pctile `y'  ,nq(20)
	di "1"
	replace Q20`y'=1 if `y'<=r(r1)
	forvalues j=1/18 {
		local l=`j'+1
		di "`l'"
	replace Q20`y'=`l' if `y'<r(r`l') & `y'>=r(r`j')	
	}
	di "20"
	replace Q20`y'=20 if `y'>=r(r19)
	}


	tab Q20SC2017 Q20SC2020,cell

	gen dif_Q20=Q20SC2020-Q20SC2017
	tab dif_Q20
di "****quartile***********"

	capture drop Q4*
	gen Q4SC2017=.
	gen Q4SC2020=.
	foreach y in SC2017 SC2020 {
	_pctile `y'  ,nq(4)
	di "1"
	replace Q4`y'=1 if `y'<=r(r1)
	forvalues j=1/2 {
		local l=`j'+1
		di "`l'"
	replace Q4`y'=`l' if `y'<r(r`l') & `y'>=r(r`j')	
	}
	di "4"
	replace Q4`y'=4 if `y'>=r(r3)
	}

	
	gen dif_Q4=Q4SC2020-Q4SC2017
save "${SOEP}OUTDATA/inno2017_2020_V4",replace /* adding childbirth in 2020, studying in 2017, and quintiles in both years */

di "******************************************"
di "********Merge with time invariant X******"
di "********YOB ****************************"
use "${SOEP}RAW/soep-is.2019_stata_en/ppfad.dta",clear /*pid uniquely identify*/
merge 1:m pid using "${SOEP}OUTDATA/inno2017_2020_V4",generate(_merge_ppfad)
keep if _merge_ppfad==3

gen male=(sex==1)
*di "Using Survey Year=2017"
gen Age=syear-gebjahr 
tab Age
	gen agegroup=0 if Age<25 
replace agegroup=1 if inrange(Age,25,30)
replace agegroup=2 if inrange(Age,30,35)
replace agegroup=3 if inrange(Age,35,40)
replace agegroup=4 if inrange(Age,40,45)
replace agegroup=5 if inrange(Age,45,50)
replace agegroup=6 if inrange(Age,50,55)
replace agegroup=7 if inrange(Age,55,60)
replace agegroup=8 if inrange(Age,60,65)
replace agegroup=9 if inrange(Age,65,70)
replace agegroup=10 if inrange(Age,70,75)
replace agegroup=11 if inrange(Age,75,100)


#delimit ;
label var agegroup "Age groups";
capture label drop agegroup;
label define agegroup 	0 "Age 17-24" 
						1 "Age 25-29"
						2 "Age 30-34"
						3 "Age 35-39"
						4 "Age 40-44"
						5 "Age 45-49"
						6 "Age 50-54"					
						7 "Age 55-59"
						8 "Age 60-64"
						9 "Age 65-69"
						10 "Age 70-74"					
						11 "Age 75 above"
						;
label values agegroup agegroup;
#delimit cr

tab edu_2019 year
bys pid (edu_2019): replace edu=edu_2019[1] if edu==.
replace edu=. if edu<=0
di "************************"
di "***life events shock****"
di "************************"

		replace emp=perw if inrange(perw,1,4)
		replace emp=5 if inrange(perw,5,9)

		gen empshock=0
		bys pid (year): replace empshock=1 if emp[1]==1 & inrange(emp[2],2,5) /*full time to others*/
		bys pid (year): replace empshock=1 if emp[1]==2 & inrange(emp[2],4,5) /*parttime to marginal or not employed*/

		tab ms
		replace ms=1 if pfamst_n==1
		replace ms=1 if ms==2 /*no married but separated in 2020*/
		replace ms=3 if pfamst_n==3
		replace ms=4 if pfamst_n==4
		replace ms=5 if pfamst_n==6
		replace ms=6 if pfamst_n==2
		replace ms=7 if pfamst_n==5

		tab ms year

		gen msshock=0
		bys pid (year): replace msshock=1 if ms[1]==1 & inrange(ms[2],3,5) /*full time to others*/
		bys pid (year): replace msshock=1 if ms[1]==6 & ms[2]==7 /*parttime to marginal or not employed*/


		gen passaway=0
		replace passaway=1 if inlist(1,pfs081, pfs091,pfs121,pfs131, pfs071) /*added spousal death*/
		bys pid (year): replace passaway=1 if passaway[2]==1 /*need to expand to both years*/
		di "childbirth"
		bys pid (childbirth): replace childbirth=childbirth[1]
		di "first job"
		gen firstjob=0
		bys pid (year): replace firstjob=1 if studying[1]==1 & inlist(emp[2],1,2)
		sum empshock msshock passaway
	tabstat empshock msshock passaway, stat(n mean sd) format(%9.3f)

		*bys pid (year): gen dif_SC=SC[2]-SC[1]

		egen SDdifSC=std(dif_SC)

		sum lginc
		bys pid (lginc): replace lginc=lginc[1]
		
di "PERSON INCOME"		
	gen person_labgro_income=pglabgro if pglabgro>=0 /*only for 2017*/
	gen grossearningslastmonth=pbrut if pbrut>=0 /*only for 2020*/

	bys pid (grossearningslastmonth):replace grossearningslastmonth=grossearningslastmonth[1]
	bys pid (person_labgro_income):replace person_labgro_income=person_labgro_income[1]
	bys year: sum person_labgro_income grossearningslastmonth
	capture drop monthlyincome
	gen monthlyincome=person_labgro_income if year==2017
	replace monthlyincome=grossearningslastmonth if year==2020

	bys year: sum monthlyincome
di "HOURLY WAGE"
	bys year: sum wage
	bys pid (wage):replace wage=wage[1]
	bys year: sum wage

	
	gen unemp=emp==5 if emp!=.
	
	
di "*********************"	
di "*********************"	
di "******COVID**********"
di "*********************"	

*The World Health Organization (WHO) on March 11, 2020, has declared the novel coronavirus (COVID-19) outbreak a global pandemic 
gen interview_date=pdatt
gen interview_month= pdatm
gen interview_year=pdatj
gen interview = mdy(interview_month, interview_date,  interview_year)
format interview %td

gen pandemic=mdy(03,11,2020)
format pandemic %td

gen sincepandemic=interview-pandemic
gen sincepandemic_sq=sincepandemic*sincepandemic

label var sincepandemic "COVID exposure"
label var sincepandemic_sq "COVID exposure sq"
	la data "merged from inno2017_2020_V4 and birth years, added edu_2019, life event shocks (spousal passaway), wage, and COVID"

save  "${SOEP}OUTDATA/inno2017_2020_ppfad_V3",replace
